This research is inspired by a simple, yet profound question: Does a lack of working street lights make neighborhoods less safe?

For years, city planners have assumed that more light means less crime. However, the history of public lighting is complicated.We recognize that past research (Yared, 2021) shows how street lamps have been used in powerful, and sometimes negative, ways to control people. This reminds us that light is not just a utility; it’s a critical tool tied to social equity and surveillance.

With this context, our project tackles a specific and urgent issue: we are using modern crime data to measure the direct link between failing infrastructure (street lights that are out) and serious violence (shooting incidents) in Manhattan neighborhoods.

By looking at crime that happens during the day compared to crime that happens at night, our goal is to find clear, data-driven answers that can help city officials decide where to spend money to fix lights and how to make every block safer.

Part 1: Interactive map visualizations

Cross-sectional analysis in 2025

This map visualizes the total number of shooting incidents that occurred and total number of “street light out” service requests reported within each Manhattan census tract in 2025.

Longitudinal analysis during 2020 to 2025

This map visualizes the total number of shooting incidents that occurred and total number of “street light out” service requests reported within each Manhattan census tract over the entire study period.

Part 2: Statistical analysis

This part models the association between the count of street lights out and the count of shooting incidents per Manhattan census tract using two methods: Poisson Regression (cross-sectional, 2025 only) and Generalized Estimating Equations (GEE) (longitudinal, 2020-2025).

Cross-sectional analysis in 2025

This analysis uses standard Poisson Regression to estimate the association between the number of street lights out and the number of shooting cases in each census tract for the year 2025.

# Calculate 2025 Shooting Counts Per Tract
shootings_per_tract_25 <- 
  st_join(manhattan_pop, shooting25_sf) |>
  group_by(GEOID) |>
  summarize(
    N_SHOOTINGS_2025 = sum(!is.na(incident_key)),
    .groups = 'drop'
  ) |>
  st_drop_geometry()

# Prepare final 2025 dataset
final_cross_section_2025 <- 
  manhattan_pop |> 
  left_join(shootings_per_tract_25, by = "GEOID") |> 
  left_join(lights_per_tract_25, by = "GEOID") |> 
  mutate(
    N_BROKEN_LIGHTS_2025 = replace_na(N_BROKEN_LIGHTS_2025, 0),
    N_SHOOTINGS_2025 = replace_na(N_SHOOTINGS_2025, 0)
  ) |>
  st_drop_geometry()

# --- POISSON REGRESSION MODEL ---
poisson_model_2025 <- glm(
  N_SHOOTINGS_2025 ~ N_BROKEN_LIGHTS_2025, 
  family = poisson, 
  data = final_cross_section_2025
)

# 1. Print the Model Summary
summary(poisson_model_2025)
## 
## Call:
## glm(formula = N_SHOOTINGS_2025 ~ N_BROKEN_LIGHTS_2025, family = poisson, 
##     data = final_cross_section_2025)
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -1.91705    0.19716  -9.723  < 2e-16 ***
## N_BROKEN_LIGHTS_2025  0.04535    0.01503   3.018  0.00254 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 252.75  on 309  degrees of freedom
## Residual deviance: 244.93  on 308  degrees of freedom
## AIC: 365.33
## 
## Number of Fisher Scoring iterations: 6
# 2. Calculate Incidence Rate Ratios (IRR)
exp(coef(poisson_model_2025))
##          (Intercept) N_BROKEN_LIGHTS_2025 
##            0.1470408            1.0463913

Longitudinal analysis during 2020 to 2025

This analysis uses a Generalized Estimating Equation (GEE) Poisson Model to assess the association over multiple years (2020-2025).

all_geoids <- manhattan_pop |> st_drop_geometry() |> dplyr::select(GEOID)
all_years <- tibble(created_year = 2020:2025)

base_grid <- all_geoids |>
  cross_join(all_years) |>
  mutate(GEOID = as.character(GEOID))

# --- CALCULATE ANNUAL SHOOTING COUNTS (2020-2025) ---
shooting20_25_df_filtered <- merged_shooting_df |>
  filter(created_year >= 2020 & created_year <= 2025) |>
  drop_na(latitude, longitude) # Ensure no NA coords here

shooting20_25_sf <- st_as_sf(
  shooting20_25_df_filtered,
  coords = c("longitude", "latitude"),
  crs = 4326
)

shootings_per_year <- st_join(manhattan_pop, shooting20_25_sf) |>
  st_drop_geometry() |>
  filter(!is.na(incident_key)) |> 
  group_by(GEOID, created_year) |>
  summarize(
    N_SHOOTINGS = n(),
    .groups = 'drop'
  ) |>
  mutate(GEOID = as.character(GEOID))


# --- CALCULATE ANNUAL BROKEN LIGHT COUNTS (2020-2025) ---
# Street light data (street11_25_light_sf_hist) prepared in Part 1/Step 6
lights_per_year <- st_join(manhattan_pop, street11_25_light_sf_hist) |>
  st_drop_geometry() |>
  filter(!is.na(descriptor)) |> 
  group_by(GEOID, created_year) |>
  summarize(
    N_BROKEN_LIGHTS = n(),
    .groups = 'drop'
  ) |>
  mutate(GEOID = as.character(GEOID))

# --- MERGE AND CLEAN LONGITUDINAL DATA ---
final_longitudinal_data <- base_grid |>
  left_join(shootings_per_year, by = c("GEOID", "created_year")) |>
  left_join(lights_per_year, by = c("GEOID", "created_year")) |>
  mutate(
    N_SHOOTINGS = replace_na(N_SHOOTINGS, 0),
    N_BROKEN_LIGHTS = replace_na(N_BROKEN_LIGHTS, 0)
  )

# --- GENERALIZED ESTIMATING EQUATION (GEE) MODEL ---
poisson_gee_model <- geepack::geeglm(
  N_SHOOTINGS ~ N_BROKEN_LIGHTS + created_year, 
  data = final_longitudinal_data,
  id = GEOID,
  family = poisson,
  corstr = "unstructured" 
)

# Output Model Summary and Incidence Rate Ratios
summary(poisson_gee_model)
## 
## Call:
## geepack::geeglm(formula = N_SHOOTINGS ~ N_BROKEN_LIGHTS + created_year, 
##     family = poisson, data = final_longitudinal_data, id = GEOID, 
##     corstr = "unstructured")
## 
##  Coefficients:
##                   Estimate    Std.err   Wald Pr(>|W|)    
## (Intercept)     448.829995  56.799488 62.442 2.78e-15 ***
## N_BROKEN_LIGHTS   0.012353   0.004656  7.039  0.00798 ** 
## created_year     -0.222397   0.028100 62.639 2.44e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation structure = unstructured 
## Estimated Scale Parameters:
## 
##             Estimate Std.err
## (Intercept)    2.459  0.2596
##   Link = identity 
## 
## Estimated Correlation Parameters:
##           Estimate Std.err
## alpha.1:2   0.7595 0.09163
## alpha.1:3   0.4816 0.08234
## alpha.1:4   0.4816 0.07975
## alpha.1:5   0.5573 0.08891
## alpha.1:6   0.2690 0.06500
## alpha.2:3   0.6232 0.09262
## alpha.2:4   0.6866 0.08908
## alpha.2:5   0.6844 0.10104
## alpha.2:6   0.3365 0.08126
## alpha.3:4   0.4244 0.06458
## alpha.3:5   0.5023 0.09885
## alpha.3:6   0.2154 0.05049
## alpha.4:5   0.4427 0.05812
## alpha.4:6   0.2284 0.06247
## alpha.5:6   0.2165 0.05758
## Number of clusters:   310  Maximum cluster size: 6
exp(coef(poisson_gee_model))
##     (Intercept) N_BROKEN_LIGHTS    created_year 
##      8.402e+194       1.012e+00       8.006e-01

Part 3: Stratified analysis by day time and night time

This section isolates shooting incidents into two distinct groups based on the time of day—DAY (Daytime) and NIGHT (Nighttime)—using the precise sunrise/sunset calculations performed in the preliminary data preparation. We then repeat the cross-sectional and longitudinal analyses for each group to see if the association with street lights out differs based on natural light conditions.

Cross-sectional analysis in 2025

Day time

# Filter for 2025 day incidents
shooting25_sf_DAY <- shooting_data_DAY |> filter(created_year == 2025) |>
  st_as_sf(coords = c("longitude", "latitude"), crs = 4326)

# Calculate DAYTIME Counts Per Tract (2025)
shootings_per_tract_25_DAY <- st_join(manhattan_pop, shooting25_sf_DAY) |>
  group_by(GEOID) |>
  summarize(N_SHOOTINGS_2025 = sum(!is.na(incident_key)), .groups = 'drop') |>
  st_drop_geometry()

final_cross_section_2025_DAY <- manhattan_pop |> 
  left_join(shootings_per_tract_25_DAY, by = "GEOID") |> 
  left_join(lights_per_tract_25, by = "GEOID") |> 
  mutate(
    N_BROKEN_LIGHTS_2025 = replace_na(N_BROKEN_LIGHTS_2025, 0),
    N_SHOOTINGS_2025 = replace_na(N_SHOOTINGS_2025, 0)
  ) |>
  st_drop_geometry()

poisson_model_2025_DAY <- glm(
  N_SHOOTINGS_2025 ~ N_BROKEN_LIGHTS_2025, 
  family = poisson, 
  data = final_cross_section_2025_DAY
)
summary(poisson_model_2025_DAY)
print(exp(coef(poisson_model_2025_DAY)))

Night time

# Filter for 2025 night incidents
shooting25_sf_NIGHT <- shooting_data_NIGHT |> filter(created_year == 2025) |>
  st_as_sf(coords = c("longitude", "latitude"), crs = 4326)

# Calculate NIGHTTIME Counts Per Tract (2025)
shootings_per_tract_25_NIGHT <- st_join(manhattan_pop, shooting25_sf_NIGHT) |>
  group_by(GEOID) |>
  summarize(N_SHOOTINGS_2025 = sum(!is.na(incident_key)), .groups = 'drop') |>
  st_drop_geometry()

final_cross_section_2025_NIGHT <- manhattan_pop |> 
  left_join(shootings_per_tract_25_NIGHT, by = "GEOID") |> 
  left_join(lights_per_tract_25, by = "GEOID") |> 
  mutate(
    N_BROKEN_LIGHTS_2025 = replace_na(N_BROKEN_LIGHTS_2025, 0),
    N_SHOOTINGS_2025 = replace_na(N_SHOOTINGS_2025, 0)
  ) |>
  st_drop_geometry()

poisson_model_2025_NIGHT <- glm(
  N_SHOOTINGS_2025 ~ N_BROKEN_LIGHTS_2025, 
  family = poisson, 
  data = final_cross_section_2025_NIGHT
)
summary(poisson_model_2025_NIGHT)
print(exp(coef(poisson_model_2025_NIGHT)))
Compare cross-sectional results through a table
Poisson Regression Results: Unlit Street Lights vs. Shooting Cases (2025) Stratified by Day Time
Time Period IRR (Incidence Rate Ratios) P-value
Daytime 1.050 0.0340
Nighttime 1.044 0.0309

Longitudinal analysis during 2020 to 2025

Day time

# --- DAYTIME Shooting Counts (2020-2025) ---
shooting_sf_DAY <- st_as_sf(
    shooting_data_DAY |> drop_na(longitude, latitude), # ADD drop_na() here
    coords = c("longitude", "latitude"), 
    crs = 4326
)

shootings_per_year_DAY <- st_join(manhattan_pop, shooting_sf_DAY) |>
  st_drop_geometry() |>
  filter(!is.na(incident_key)) |> 
  group_by(GEOID, created_year) |>
  summarize(N_SHOOTINGS = n(), .groups = 'drop') |>
  mutate(GEOID = as.character(GEOID))

# --- DAYTIME Merged Data ---
final_longitudinal_data_DAY <- base_grid |>
  left_join(shootings_per_year_DAY, by = c("GEOID", "created_year")) |>
  left_join(lights_per_year, by = c("GEOID", "created_year")) |>
  mutate(
    N_SHOOTINGS = replace_na(N_SHOOTINGS, 0),
    N_BROKEN_LIGHTS = replace_na(N_BROKEN_LIGHTS, 0)
  )

poisson_gee_model_DAY <- geepack::geeglm(
  N_SHOOTINGS ~ N_BROKEN_LIGHTS + created_year, 
  data = final_longitudinal_data_DAY,
  id = GEOID, 
  family = poisson,
  corstr = "unstructured" 
)

summary(poisson_gee_model_DAY)
print(exp(coef(poisson_gee_model_DAY)))

Night time

# --- NIGHTTIME Shooting Counts (2020-2025) ---
shooting_sf_NIGHT <- st_as_sf(
    shooting_data_NIGHT |> drop_na(longitude, latitude), # ADD drop_na() here
    coords = c("longitude", "latitude"), 
    crs = 4326
)

shootings_per_year_NIGHT <- st_join(manhattan_pop, shooting_sf_NIGHT) |>
  st_drop_geometry() |>
  filter(!is.na(incident_key)) |> 
  group_by(GEOID, created_year) |>
  summarize(N_SHOOTINGS = n(), .groups = 'drop') |>
  mutate(GEOID = as.character(GEOID))

# --- NIGHTTIME Merged Data ---
final_longitudinal_data_NIGHT <- base_grid |>
  left_join(shootings_per_year_NIGHT, by = c("GEOID", "created_year")) |>
  left_join(lights_per_year, by = c("GEOID", "created_year")) |>
  mutate(
    N_SHOOTINGS = replace_na(N_SHOOTINGS, 0),
    N_BROKEN_LIGHTS = replace_na(N_BROKEN_LIGHTS, 0)
  )

poisson_gee_model_NIGHT <- geepack::geeglm(
  N_SHOOTINGS ~ N_BROKEN_LIGHTS + created_year, 
  data = final_longitudinal_data_NIGHT,
  id = GEOID, 
  family = poisson,
  corstr = "unstructured" 
)

summary(poisson_gee_model_NIGHT)
print(exp(coef(poisson_gee_model_NIGHT)))
Compare GEE results through a table
GEE Poisson Regression Results: Unlit Street Lights vs. Shooting Cases (2020-2025) Stratified by Day Time
Time Period IRR (Incidence Rate Ratios) P-value (Wald Test)
Daylight 1.01 0.008
Nighttime 1.02 0.000